Code
ks_care_facility <- read_rds("./Data/cleaned_ks/care_facility.rds")
ks_care_facility <- ks_care_facility |>
rename(
program = `Program Type`,
n_facility = `Total Facilities`,
n_capacity = `Total Capacity`
)understand the current child care landscape in Kansas, through analysis of data provided
assess the child care landscapes of states comparable to Kansas through student research and data collection of comparable datasets to Kansas
draw conclusions based on the child care landscapes of Kansas and selected states, predict an outcome for Kansas through 1-year, 3-year, and 5-year trend analysis.
First, Kansas data is broken up into different types of child care licensing. It would be useful to visualize the difference between the types of child care centers, and where the individual centers are throughout the state.
ks_care_facility <- read_rds("./Data/cleaned_ks/care_facility.rds")
ks_care_facility <- ks_care_facility |>
rename(
program = `Program Type`,
n_facility = `Total Facilities`,
n_capacity = `Total Capacity`
)Year trend visualization - Number of facility and capacity
# n of facility trend
ggplot(ks_care_facility, aes(x = year, y = n_facility)) +
geom_col() +
facet_wrap( ~ program, scales = "free")# n of capacity trend
ggplot(ks_care_facility, aes(x = year, y = n_capacity)) +
geom_col() +
facet_wrap( ~ program, scales = "free")Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_col()`).
Year program trend visualization - N of facility and capacity
#facility
ggplot(ks_care_facility, aes(x = (n_facility), y = program)) +
geom_col() +
geom_text(
aes(label = n_facility, x = n_facility + 10)
) +
facet_wrap( ~ year)#capacity
ggplot(ks_care_facility, aes(x = (n_capacity), y = program)) +
geom_col() +
geom_text(
aes(label = n_capacity, x = n_capacity + 10)
) +
facet_wrap( ~ year)data import
ks_facility <- read_rds("./Data/cleaned_ks/ks_facility.rds")desired capacity vs licensed capacity - Year of 2024
ggplot(ks_facility, aes(x = (n_desired_cap), y = care_facility)) +
geom_col(aes(fill = "N of desired capacity"),
width = 0.25,
position = position_nudge(y = -0.25)) +
geom_col(aes(x = n_licensed_cap, fill = "N of licensed capacity"),
width = 0.25,
position = position_nudge(y = 0)) +
geom_col(aes(x = n_child_enrolled, fill = "N of enrolled children"),
width = 0.25,
position = position_nudge(y = 0.25)) +
labs(
title = "KS 2024 status of childcare facility",
x = " ",
y = "Type of childcare facility",
fill = " "
)ks_family_rate <- read_rds("./Data/cleaned_ks/ks_family_rate.rds")
ks_census <- read_rds("./Data/cleaned_ks/ks_census_cleaned.rds")
ks_census <- ks_census |>
mutate(
median_h_income_monthly = median_h_income / 12
)
#montly rate
ggplot(ks_family_rate, aes(x = avg_part_monthly, y = age_group)) +
geom_col(aes(fill = "Average monthly part-time rate"),
width = 0.3,
position = position_nudge(y = -0.25)) +
geom_col(aes(x = avg_full_monthly, fill = "Average monthly full-time rate"),
width = 0.3,
position = position_nudge(y = 0)) +
geom_text(
aes(
label = paste0(round(avg_full_monthly/ks_census$median_h_income_monthly, digit = 2)*100, "%", "of monthly income"),
x = 200),
size = 2.5
) +
labs(
title = "KS 2024 Family Child Care Rate",
y = "Age Group",
x = "Rate",
fill = " "
)ks_child_rate <- read_rds("./Data/cleaned_ks/ks_child_rate.rds")
#montly rate
ggplot(ks_child_rate, aes(x = avg_part_monthly, y = age_group)) +
geom_col(aes(fill = "Average monthly part-time rate"),
width = 0.3,
position = position_nudge(y = -0.25)) +
geom_col(aes(x = avg_full_monthly, fill = "Average monthly full-time rate"),
width = 0.3,
position = position_nudge(y = 0)) +
geom_text(
aes(
label = paste0(round(avg_full_monthly/ks_census$median_h_income_monthly, digit = 2)*100, "%", "of monthly income"),
x = 300
),
size = 2.5,
) +
labs(
title = "KS 2024 Child Care Center Rate",
y = "Age Group",
x = "Rate",
fill = " "
)data import - 2024 year
county_facility <- read_rds("./Data/cleaned_ks/county_facility.rds")Desired capacity vs licensed capacity - Year of 2024
county_facility <- county_facility |>
mutate(
n_disc_cap = n_licensed_cap - n_desired_cap
)
county_facility |>
summarise(
mean = mean(n_disc_cap),
sd = sd(n_disc_cap),
.by = c(county, care_facility)
)# A tibble: 525 × 4
county care_facility mean sd
<fct> <fct> <dbl> <dbl>
1 ALLEN child care centers 41 NA
2 ALLEN licesned family/group 34 NA
3 ALLEN head start 0 NA
4 ALLEN preschool 0 NA
5 ALLEN school age program 0 NA
6 ANDERSON child care centers 0 NA
7 ANDERSON licesned family/group 15 NA
8 ANDERSON head start 2 NA
9 ANDERSON preschool 0 NA
10 ANDERSON school age program 0 NA
# ℹ 515 more rows
county_facility |>
filter(
n_disc_cap < 0
) # A tibble: 1 × 8
county care_facility n_child_enrolled n_desired_cap n_licensed_cap n_dcf
<fct> <fct> <dbl> <dbl> <dbl> <dbl>
1 MCPHERSON head start 142 159 156 10
# ℹ 2 more variables: n_food_program <dbl>, n_disc_cap <dbl>
ks_t_child <- read_xlsx("./Data/datacenter/KS Childcare Centers.xlsx")
ks_t_child <- ks_t_child |>
mutate(
across(
.cols = c(`2017`:`2022`),
.fns = ~ as.numeric(.)
)
) |>
pivot_longer(
cols = -c(Location, `Data Type`),
names_to = "year",
values_to = "n_child_facility"
) |>
mutate(
year = as.numeric(year)
)
ggplot(ks_t_child, aes(x = year, y = n_child_facility)) +
geom_col() +
facet_wrap_paginate( ~ Location, scales = "free", ncol = 5, nrow = 3, page = 1)Child Care Centers - Kansas
pro_child <- ks_t_child |>
summarise(
n_child_facility = sum(n_child_facility, na.rm = TRUE),
.by = "year"
)
ts_data <- ts(pro_child$n_child_facility, start = 2017, frequency = 1)
fit_child <- auto.arima(ts_data)
summary(fit_child)Series: ts_data
ARIMA(0,1,0) with drift
Coefficients:
drift
10.400
s.e. 1.992
sigma^2 = 24.89: log likelihood = -14.56
AIC=33.13 AICc=39.13 BIC=32.35
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.09893328 4.073335 2.9656 0.007204792 0.4625464 0.2851538
ACF1
Training set -0.1583016
future_child <- forecast(fit_child, h = 5)
print(future_child) Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
2023 666.4 660.0066 672.7934 656.6221 676.1779
2024 676.8 667.7584 685.8416 662.9720 690.6280
2025 687.2 676.1263 698.2737 670.2642 704.1358
2026 697.6 684.8132 710.3868 678.0443 717.1557
2027 708.0 693.7039 722.2961 686.1360 729.8640
plot(future_child, main = "ARIMA Forecast of Facilities", xlab = "Year", ylab = "Number of Facilities")Child Care Centers - County Level - KS
n_county <- length(unique(ks_t_child$Location))
#105 counties
ks_t_child_county <- ks_t_child |>
filter(
n_child_facility > 0,
.by = Location
) |>
filter(
n() == 6,
.by = Location
)
n_county_ts <- length(unique(ks_t_child_county$Location))
county_ts <- unique(ks_t_child_county$Location)
#59 counties
#46 counties have no child care centers in one of the years from 2017 to 2022
#These counties cannot fit projection
for(i in seq(n_county_ts)){
pro_child_county <- ks_t_child_county |>
filter(Location == county_ts[i])
ts_data <- ts(pro_child_county$n_child_facility, start = 2017, frequency = 1)
fit_child_county <- auto.arima(ts_data)
future_child_county <- forecast(fit_child_county, h = 5)
plot(
future_child_county,
main = paste0("ARIMA Forecast of Childcare Facilities - ", county_ts[i]),
xlab = "Year",
ylab = "Number of Facilities")
}ks_t_family <- read_xlsx("./Data/datacenter/KS Family care centers.xlsx")
ks_t_family <- ks_t_family |>
mutate(
across(
.cols = `2017`:`2022`,
.fns = ~ as.numeric(.)
)
) |>
pivot_longer(
cols = -c(Location, `Data Type`),
names_to = "year",
values_to = "n_family_facility"
)
ggplot(ks_t_family, aes(x = year, y = n_family_facility)) +
geom_col() +
facet_wrap_paginate( ~ Location, scales = "free", ncol = 5, nrow = 3, page = 2)Family care centers - KS
pro_family <- ks_t_family |>
summarise(
n_family_facility = sum(n_family_facility, na.rm = TRUE),
.by = "year"
)
ts_data <- ts(pro_family$n_family_facility, start = 2017, frequency = 1)
fit_family <- auto.arima(ts_data)
summary(fit_family)Series: ts_data
ARIMA(0,1,0)
sigma^2 = 67758: log likelihood = -34.9
AIC=71.81 AICc=73.14 BIC=71.42
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -170.1257 237.624 176.8743 -4.796147 4.98014 0.8366809 -0.2919201
future_family <- forecast(fit_family, h = 5)
print(future_family) Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
2023 3221 2887.407 3554.593 2710.813 3731.187
2024 3221 2749.228 3692.772 2499.487 3942.513
2025 3221 2643.200 3798.800 2337.331 4104.669
2026 3221 2553.814 3888.186 2200.627 4241.373
2027 3221 2475.063 3966.937 2080.188 4361.812
plot(future_family, main = "ARIMA Forecast of Facilities", xlab = "Year", ylab = "Number of Family Facilities")n_county <- length(unique(ks_t_family$Location))
#105 counties
ks_t_family_county <- ks_t_family |>
filter(
n_family_facility > 0,
.by = Location
) |>
filter(
n() == 6,
.by = Location
)
n_county_ts <- length(unique(ks_t_family_county$Location))
county_ts <- unique(ks_t_family_county$Location)
#all counties have more than one family care centers
for(i in seq(n_county_ts)){
pro_family_county <- ks_t_family_county |>
filter(Location == county_ts[i])
ts_data <- ts(pro_family_county$n_family_facility, start = 2017, frequency = 1)
fit_family_county <- auto.arima(ts_data)
future_family_county <- forecast(fit_family_county, h = 5)
plot(
future_family_county,
main = paste0("ARIMA Forecast of Family care Facilities - ", county_ts[i]),
xlab = "Year",
ylab = "Number of Facilities")
}Pre-Kindergarten and 4-year-old At Risk Programs is the percent of public elementary schools that offer pre-kindergarten or 4-year-old At-Risk program five days a week. Public non-elementary buildings with such programs are also included in the numerator and the denominator. Data are provided by the Kansas State Department of Education. The current rate represents academic year 2022-23.
ks_t_pre_k <- read_xlsx("./Data/datacenter/KS Pre-kindergarten.xlsx")
ks_t_pre_k <- ks_t_pre_k |>
rename(
year = TimeFrame,
rate_pre_k = Data
)
ggplot(ks_t_pre_k |> filter(Location == "Kansas"), aes(x = year, y = rate_pre_k)) +
geom_col() Head Start is the number of Head Start enrollment slots available per 100 children 3–4 years of age living in families with income below the U.S. poverty threshold.
ks_t_head <- read_xlsx("./Data/datacenter/KS Head Start.xlsx")
ks_t_head <- ks_t_head |>
rename(
year = TimeFrame,
slot_per_100c = Data
) |>
mutate(
slot_per_100c = as.numeric(slot_per_100c)
)
ggplot(ks_t_head |> filter(Location == "Kansas"), aes(x = year, y = slot_per_100c)) +
geom_col()Early Head Start is the number of Early Head Start slots available per 100 children from birth through 3 years of age living in families with incomes below the U.S. poverty threshold.
ks_t_early_head <- read_xlsx("./Data/datacenter/KS Early Head Start.xlsx")
ks_t_early_head <- ks_t_early_head |>
rename(
year = TimeFrame,
slot_per_100c = Data
) |>
mutate(
slot_per_100c = as.numeric(slot_per_100c)
)
ggplot(ks_t_early_head |> filter(Location == "Kansas"), aes(x = year, y = slot_per_100c)) +
geom_col()map counties with 0 child care centers - along with the child population who needs childcare centers
#county census data - N of children
county_census_cleaned <- read_rds("./Data/cleaned_ks/county_census_clened.rds")
#county care facility 2022 - 2024 number of facility
county_care_facility_long <- read_rds("./Data/cleaned_ks/county_care_facility_year.rds")
county_care_2024 <- county_care_facility_long |>
filter(
year == 2024
)
#kansas county child care centers data for 2024
ks_county_cc <- county_facility |>
filter(
care_facility == "child care centers"
) |>
left_join(county_census_cleaned, by = "county") |>
left_join(county_care_2024 |> dplyr::select(county:child_care), by = "county") |>
mutate(
county = str_to_title(county),
name = county
) |>
select(-county)
#kansas county geojson file import
ks_county_json <- geojsonio::geojson_read("./Data/kansas-with-county-boundaries_1099.geojson", what = "sp")
#merge data
ks_county_map <- sp::merge(ks_county_json, ks_county_cc, by.x = "name", by.y = "name", duplicateGeoms = TRUE)m <- leaflet(ks_county_map) |>
addProviderTiles(
"MapBox",
options = providerTileOptions(
id = "mapbox.light",
accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))
bins <- c(0, 1, 5, 10, 50, 100, 200)
pals <- colorBin(
"YlOrRd",
domain = ks_county_map$child_care,
bins = bins,
reverse = TRUE,
pretty = TRUE
)
labels <- sprintf(
"<strong>%s</strong><br/>%g childcare <br/>N of children who needs childcare: %g <br/>N of children enrolled in childcare: %g <br/> N of desired capacity: %g <br/> N of licensed capacity: %g",
ks_county_map$name, ks_county_map$child_care, ks_county_map$n_child_under_6_lab, ks_county_map$n_child_enrolled, ks_county_map$n_desired_cap, ks_county_map$n_licensed_cap
) %>% lapply(htmltools::HTML)
m <- m |> addPolygons(
fillColor = ~pals(child_care),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlightOptions = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
)
m |> addLegend(
pal = pals,
values = ~child_care,
title = "N of childcare center",
opacity = 0.7,
position = "bottomright"
)#kansas county family care centers data for 2024
ks_county_fc <- county_facility |>
filter(
care_facility == "licesned family/group"
) |>
left_join(county_census_cleaned, by = "county") |>
left_join(county_care_2024 |> dplyr::select(county, year, licesned_family), by = "county") |>
mutate(
county = str_to_title(county),
name = county
) |>
select(-county)
#merge data
ks_county_map_fc <- sp::merge(ks_county_json, ks_county_fc, by.x = "name", by.y = "name", duplicateGeoms = TRUE)m2 <- leaflet(ks_county_map_fc) |>
addProviderTiles(
"MapBox",
options = providerTileOptions(
id = "mapbox.light",
accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))
bins_fc <- c(0, 10, 50, 100, 250, 500)
pals_fc <- colorBin(
"YlOrRd",
domain = ks_county_map_fc$licesned_family,
bins = bins_fc,
reverse = TRUE,
pretty = TRUE,
na.color = "gray"
)
labels <- sprintf(
"<strong>%s</strong><br/>%g family care center <br/>N of children who needs childcare: %g <br/>N of children enrolled in family care: %g <br/> N of desired capacity: %g <br/> N of licensed capacity: %g",
ks_county_map_fc$name, ks_county_map_fc$licesned_family, ks_county_map_fc$n_child_under_6_lab, ks_county_map_fc$n_child_enrolled, ks_county_map_fc$n_desired_cap, ks_county_map_fc$n_licensed_cap
) %>% lapply(htmltools::HTML)
m2 <- m2 |> addPolygons(
fillColor = ~pals_fc(licesned_family),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlightOptions = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
)
m2 |> addLegend(
pal = pals_fc,
values = ~licesned_family,
title = "N of family care center",
opacity = 0.7,
position = "bottomright"
)Childcare centers per 1000 child population.
1 indicates 1 child care centers per 1000 child in a county
county_care_2022 <- county_care_facility_long |>
filter(
year == 2022
)
#kansas county child care centers data for 2024
ks_county_22 <- county_facility |>
filter(
care_facility == "child care centers"
) |>
left_join(county_census_cleaned, by = "county") |>
left_join(county_care_2022 |> dplyr::select(county:child_care), by = "county") |>
mutate(
county = str_to_title(county),
name = county,
child_care_per_thou = round((child_care/n_residents_under_6)*1000, digits = 2)
) |>
select(-county)
#kansas county geojson file import
ks_county_json <- geojsonio::geojson_read("./Data/kansas-with-county-boundaries_1099.geojson", what = "sp")
#merge data
ks_county_map_22 <- sp::merge(ks_county_json, ks_county_22, by.x = "name", by.y = "name", duplicateGeoms = TRUE)m_22 <- leaflet(ks_county_map_22) |>
addProviderTiles(
"MapBox",
options = providerTileOptions(
id = "mapbox.light",
accessToken = Sys.getenv('MAPBOX_ACCESS_TOKEN')))
bins <- c(0, 1, 2, 3, 4, 5, 10)
pals <- colorBin(
"YlOrRd",
domain = ks_county_map_22$child_care_per_thou,
bins = bins,
reverse = TRUE,
pretty = TRUE
)
labels <- sprintf(
"<strong>%s</strong><br/>%g childcare per 1000 child pop in 2022 <br/>N of children who needs childcare: %g <br/>Child Population: %g <br/> N of childcare centers: %g",
ks_county_map_22$name, ks_county_map_22$child_care_per_thou, ks_county_map_22$n_child_under_6_lab, ks_county_map_22$n_residents_under_6, ks_county_map_22$child_care
) %>% lapply(htmltools::HTML)
m_22 <- m_22 |> addPolygons(
fillColor = ~pals(child_care_per_thou),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlightOptions = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto"
)
)
m_22 |> addLegend(
pal = pals,
values = ~child_care,
title = "N of childcare center per 1000 child pop - 2022",
opacity = 0.7,
position = "bottomright"
)